package third.spider.downloader;

import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import fun.codedesign.yinxue.util.IOUtil;
import third.spider.DownLoaderException;
import third.spider.Downloader;
import third.spider.Downloader.Mode;
import third.spider.transport.HttpClientBuilder;
import third.spider.transport.HttpClientPool;

/**
 * @author zengjian
 * @create 2018-07-12 15:51
 * @since 1.0.0
 */
public class SimpleDownloader implements Downloader {

    private final Logger LOGGER = LoggerFactory.getLogger(this.getClass());

    public static final String DEFAULT_HOST = "10.37.235.10";
    public static final Integer DEFAULT_PORT = 8080;

    /**
     * 取网页charset正则表达式
     */
    private static final Pattern CHARSET_PATTERN = Pattern.compile("<meta [a-z-\"A-Z= /;]*charset=\"?([a-z0-9-]+)\" */?>");

    /**
     * 代理ip
     */
    private String proxyHost;

    /**
     * 代理端口
     */
    private Integer proxyPort;

    /**
     * 客户端池
     */
    private HttpClientPool httpClientPool = new HttpClientPool(new HttpClientBuilder());

    /**
     * 采用代理还是连直连方式
     */
    private Mode mode = Mode.DIRECT;

    /**
     * read阻塞最大时间
     */
    private static final int SOCKET_TIME = 30000;

    /**
     * TCP3次握手连接建立最大时间
     */
    private static final int CONNECT_TIME = 3000;

    public SimpleDownloader() {
    }

    public SimpleDownloader(String proxyHost, Integer proxyPort) {
        this.proxyHost = proxyHost;
        this.proxyPort = proxyPort;
        mode = Mode.PROXY;
    }

    /**
     * 返回Json字符串
     *
     * @param url
     * @param param
     * @return
     */
    public String downloadJson(final String url, final Object param) {
        return null;
    }

    public String downloadHtml(final String url) throws DownLoaderException {
        CloseableHttpClient client = null;
        CloseableHttpResponse response = null;
        HttpEntity entity = null;
        HttpClientContext context = null;
        HttpUriRequest request = null;
        String html = "";
        try {
            client = httpClientPool.borrowObject();
            request = buildRequest(url);
            context = HttpClientContext.create();
            // 先按utf-8取得页面
            html = doDownload(request, entity, response, context, client, "utf-8");
            // 如果为其他编码格式再执行一次
            String charset = extractCharset(html, "utf-8");
            LOGGER.info("解析网页编码格式为:" + charset.toString());
            if (!"utf-8".equalsIgnoreCase(charset)) {
                html = doDownload(request, entity, response, context, client, charset);
            }
            return html;
        } catch (Exception e) {
            LOGGER.error("客户端下载页面异常，执行请求失败", e);
            throw new DownLoaderException(e);
        } finally {
            try {
                // 关闭流才能返回连接
                EntityUtils.consume(entity);
            } catch (IOException e) {
                LOGGER.error("关闭响应实体类异常，url地址:{}", url, e);
            }
            // 如果连接已经归还，那什么都不做
//            IOUtil.close(response);
            if (client != null) {
                httpClientPool.returnObject(client);
            }
        }
    }

    private HttpUriRequest buildRequest(String url) {
        if (mode == Mode.DIRECT) {
            return buildHttpGet(url);
        }
        if (mode == Mode.PROXY) {
            return buildProxyHttpGet(url, new HttpHost(proxyHost, proxyPort));
        }
        throw new IllegalStateException("即不是直连也不是代理模式");
    }

    private String extractCharset(String html, String defaultCode) {
        // 用正则表达式匹配
        Matcher matcher = CHARSET_PATTERN.matcher(html);
        while (matcher.find()) {
            String charset = matcher.group(1);
            return charset;
        }
        return defaultCode;
    }

    private String doDownload(HttpUriRequest request, HttpEntity entity, CloseableHttpResponse response,
                              HttpClientContext context, CloseableHttpClient client, String charset) throws IOException {
        String html = "";
        response = client.execute(request, context);
        entity = response.getEntity();
        if (response != null && response.getStatusLine() != null && response.getStatusLine().getStatusCode() == 200) {
            html = EntityUtils.toString(entity, charset);
        }
        return html;
    }


    /**
     * 创建Get请求
     *
     * @param url
     * @return
     */
    public static HttpUriRequest buildHttpGet(String url) {
        HttpGet request = new HttpGet(url);
        RequestConfig config = RequestConfig.custom().build();
        request.setConfig(config);
        return request;
    }

    /**
     * 创建代理Get请求
     *
     * @param url
     * @param HttpHost 代理地址
     * @return
     */
    public static HttpUriRequest buildProxyHttpGet(String url, HttpHost proxy) {
        HttpGet request = new HttpGet(url);
        request.setConfig(RequestConfig.custom()
                .setSocketTimeout(SOCKET_TIME)
                .setConnectTimeout(CONNECT_TIME)
                .setProxy(proxy)
                //.setCookieSpec(COOKIE)
                .build());
        return request;
    }

    public HttpClientPool getHttpClientPool() {
        return httpClientPool;
    }

    public void setHttpClientPool(HttpClientPool httpClientPool) {
        this.httpClientPool = httpClientPool;
    }

    public Mode getMode() {
        return mode;
    }

    public void setMode(Mode mode) {
        this.mode = mode;
    }



}
